library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.2 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggpubr)
library(ggsci)
library(ggExtra)
library(patchwork)
select = dplyr::select
mutate = dplyr::mutate
Load pre-processed vcf data and metadata
df_seq <- readRDS(file = "../data/processed_data/christos_2021_NGmerge.rds")
head(df_seq)
| SAMPLE | POS | ID | REF | ALT | QUAL | FILTER | VCF_INFO | FORMAT | ALLELE | FREQ | MUT_TYPE | READ_DEPTH | EDIT_TYPE | EDITOR | SITE | REPLICATE | Kreads | >=Q30 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| P20657_1010 | 7 | . | A | <*> | 0 | . | DP=10;I16=1,0,0,0,14,196,0,0,60,3600,0,0,0,0,0,0;QS=1,0;MQ0F=0 | PL:AD | A | 0 | REF | 0 | reference | AID | A | 1 | 183.45 | 85.82 |
| P20657_1010 | 7 | . | A | <*> | 0 | . | DP=10;I16=1,0,0,0,14,196,0,0,60,3600,0,0,0,0,0,0;QS=1,0;MQ0F=0 | PL:AD | D | 0 | SNP | 0 | A_to_D | AID | A | 1 | 183.45 | 85.82 |
| P20657_1018 | 7 | . | A | <*> | 0 | . | DP=10;I16=1,0,0,0,14,196,0,0,60,3600,0,0,0,0,0,0;QS=1,0;MQ0F=0 | PL:AD | A | 0 | REF | 0 | reference | AID | C | 3 | 853.00 | 97.40 |
| P20657_1018 | 7 | . | A | <*> | 0 | . | DP=10;I16=1,0,0,0,14,196,0,0,60,3600,0,0,0,0,0,0;QS=1,0;MQ0F=0 | PL:AD | D | 0 | SNP | 0 | A_to_D | AID | C | 3 | 853.00 | 97.40 |
| P20657_1041 | 7 | . | A | <*> | 0 | . | DP=10;I16=1,0,0,0,14,196,0,0,60,3600,0,0,0,0,0,0;QS=1,0;MQ0F=0 | PL:AD | A | 0 | REF | 0 | reference | Nish | B | 2 | 159.23 | 96.26 |
| P20657_1041 | 7 | . | A | <*> | 0 | . | DP=10;I16=1,0,0,0,14,196,0,0,60,3600,0,0,0,0,0,0;QS=1,0;MQ0F=0 | PL:AD | D | 0 | SNP | 0 | A_to_D | Nish | B | 2 | 159.23 | 96.26 |
Let’s begin by plotting read depth across the sequence per sample
df_seq %>%
group_by(SAMPLE) %>%
summarise(median_read_depth = median(READ_DEPTH)) %>%
ungroup() %>%
head()
| SAMPLE | median_read_depth |
|---|---|
| P20657_1001 | 136 |
| P20657_1002 | 196 |
| P20657_1003 | 125 |
| P20657_1004 | 123 |
| P20657_1005 | 61 |
| P20657_1006 | 22 |
df_seq %>%
ggplot(aes(x=POS,y=READ_DEPTH,group=SAMPLE, color=EDITOR)) +
geom_point() +
theme_bw() +
ylab("Read depth") +
xlab("CAN1 position") +
theme(legend.position = "bottom")
#plot 2x only
df_seq %>%
filter(EDITOR == "2x") %>%
ggplot(aes(x=POS,y=READ_DEPTH,group=SAMPLE, color=REPLICATE)) +
geom_point() +
theme_bw() +
ylab("Read depth") +
xlab("CAN1 position") +
theme(legend.position = "bottom")
Let’s look at the amount of mutations per sample, and compare this to read depth
df_seq %>%
group_by(SAMPLE) %>%
summarise(median_read_depth = mean(READ_DEPTH),
median_mutation_freq = mean(FREQ)) %>%
ungroup()
| SAMPLE | median_read_depth | median_mutation_freq |
|---|---|---|
| P20657_1001 | 57210.1531 | 14302.54198 |
| P20657_1002 | 78848.9830 | 19712.24767 |
| P20657_1003 | 34490.6209 | 8622.65818 |
| P20657_1004 | 68929.2964 | 17232.32824 |
| P20657_1005 | 61622.9601 | 15405.74300 |
| P20657_1006 | 122290.5891 | 30572.65734 |
| P20657_1007 | 55732.1781 | 13933.04453 |
| P20657_1008 | 104411.5038 | 26102.87659 |
| P20657_1009 | 137252.6862 | 34313.17303 |
| P20657_1010 | 35718.5182 | 8929.63232 |
| P20657_1011 | 39823.2044 | 9955.80492 |
| P20657_1012 | 27889.5055 | 6972.37701 |
| P20657_1013 | 143.1450 | 35.78626 |
| P20657_1014 | 46253.4707 | 11563.37235 |
| P20657_1015 | 116956.9712 | 29239.25106 |
| P20657_1016 | 161131.9432 | 40282.98685 |
| P20657_1017 | 543.3656 | 135.84139 |
| P20657_1018 | 192380.2010 | 48095.05174 |
| P20657_1019 | 33629.9444 | 8407.49109 |
| P20657_1020 | 48188.3206 | 12047.08185 |
| P20657_1021 | 36623.0492 | 9155.76463 |
| P20657_1022 | 49686.3198 | 12421.58439 |
| P20657_1023 | 33060.8104 | 8265.20738 |
| P20657_1024 | 37720.4288 | 9430.11069 |
| P20657_1025 | 166698.4003 | 41674.60093 |
| P20657_1026 | 154900.1247 | 38725.03308 |
| P20657_1027 | 63063.1807 | 15765.79517 |
| P20657_1028 | 48407.9249 | 12101.98219 |
| P20657_1029 | 53170.5674 | 13292.64249 |
| P20657_1030 | 39140.3715 | 9785.09372 |
| P20657_1031 | 32086.0199 | 8021.50763 |
| P20657_1032 | 55059.6187 | 13764.91009 |
| P20657_1033 | 35040.1662 | 8760.04326 |
| P20657_1034 | 145882.9779 | 36470.74640 |
| P20657_1035 | 192352.8753 | 48088.21968 |
| P20657_1036 | 93333.7294 | 23333.43299 |
| P20657_1037 | 48340.8469 | 12085.21332 |
| P20657_1038 | 70347.0335 | 17586.76124 |
| P20657_1039 | 56317.3363 | 14079.33545 |
| P20657_1040 | 29928.4822 | 7482.12256 |
| P20657_1041 | 48556.2642 | 12139.07082 |
| P20657_1042 | 34413.6293 | 8603.40967 |
| P20657_1043 | 99475.5004 | 24868.87532 |
| P20657_1044 | 135857.8991 | 33964.47498 |
| P20657_1045 | 135367.4037 | 33841.85199 |
| P20657_1046 | 13360.9822 | 3340.24597 |
| P20657_1047 | 79826.4169 | 19956.60517 |
| P20657_1048 | 55364.2498 | 13841.06404 |
| P20657_1049 | 58121.8202 | 14530.45929 |
| P20657_1050 | 49723.5030 | 12430.88041 |
| P20657_1051 | 46804.9534 | 11701.24088 |
| P20657_1052 | 35318.2655 | 8829.56658 |
| P20657_1053 | 41236.1137 | 10309.02884 |
| P20657_1054 | 19853.9779 | 4963.49491 |
| P20657_1055 | 42293.0017 | 10573.25064 |
| P20657_1056 | 29868.1289 | 7467.03308 |
| P20657_1057 | 47496.4228 | 11874.10941 |
| P20657_1058 | 30180.1506 | 7545.04029 |
| P20657_1059 | 20493.5488 | 5123.38889 |
| P20657_1060 | 62136.8584 | 15534.21459 |
| P20657_1061 | 75452.3011 | 18863.07591 |
| P20657_1062 | 43026.4928 | 10756.62341 |
| P20657_1063 | 34489.6476 | 8622.41264 |
| P20657_1064 | 36297.0407 | 9074.26209 |
| P20657_1065 | 27371.7578 | 6842.94020 |
| P20657_1066 | 47682.8469 | 11920.71501 |
Remove low coverage regions
df_seq = df_seq %>%
filter(READ_DEPTH > 5000)
df_seq %>% head()
| SAMPLE | POS | ID | REF | ALT | QUAL | FILTER | VCF_INFO | FORMAT | ALLELE | FREQ | MUT_TYPE | READ_DEPTH | EDIT_TYPE | EDITOR | SITE | REPLICATE | Kreads | >=Q30 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| P20657_1010 | 9 | . | T | G,C,<*> | 0 | . | DP=2959439;I16=2.95674e+06,1,653,0,9.67652e+07,3.17134e+09,9411,136033,1.77404e+08,1.06443e+10,39180,2.3508e+06,846,3998,0,0;QS=59.9979,0.00158485,0.000471937,0;VDB=0;SGB=-10413;RPB=0.999984;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | T | 110498 | REF | 110532 | reference | AID | A | 1 | 183.45 | 85.82 |
| P20657_1010 | 9 | . | T | G,C,<*> | 0 | . | DP=2959439;I16=2.95674e+06,1,653,0,9.67652e+07,3.17134e+09,9411,136033,1.77404e+08,1.06443e+10,39180,2.3508e+06,846,3998,0,0;QS=59.9979,0.00158485,0.000471937,0;VDB=0;SGB=-10413;RPB=0.999984;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | G | 24 | SNP | 110532 | T_to_G | AID | A | 1 | 183.45 | 85.82 |
| P20657_1010 | 9 | . | T | G,C,<*> | 0 | . | DP=2959439;I16=2.95674e+06,1,653,0,9.67652e+07,3.17134e+09,9411,136033,1.77404e+08,1.06443e+10,39180,2.3508e+06,846,3998,0,0;QS=59.9979,0.00158485,0.000471937,0;VDB=0;SGB=-10413;RPB=0.999984;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | C | 10 | SNP | 110532 | T_to_C | AID | A | 1 | 183.45 | 85.82 |
| P20657_1010 | 9 | . | T | G,C,<*> | 0 | . | DP=2959439;I16=2.95674e+06,1,653,0,9.67652e+07,3.17134e+09,9411,136033,1.77404e+08,1.06443e+10,39180,2.3508e+06,846,3998,0,0;QS=59.9979,0.00158485,0.000471937,0;VDB=0;SGB=-10413;RPB=0.999984;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | D | 0 | SNP | 110532 | T_to_D | AID | A | 1 | 183.45 | 85.82 |
| P20657_1002 | 9 | . | T | G,C,<*> | 0 | . | DP=2959439;I16=2.95674e+06,1,653,0,9.67652e+07,3.17134e+09,9411,136033,1.77404e+08,1.06443e+10,39180,2.3508e+06,846,3998,0,0;QS=59.9979,0.00158485,0.000471937,0;VDB=0;SGB=-10413;RPB=0.999984;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | T | 244018 | REF | 244093 | reference | WT | A | 2 | 285.05 | 94.97 |
| P20657_1002 | 9 | . | T | G,C,<*> | 0 | . | DP=2959439;I16=2.95674e+06,1,653,0,9.67652e+07,3.17134e+09,9411,136033,1.77404e+08,1.06443e+10,39180,2.3508e+06,846,3998,0,0;QS=59.9979,0.00158485,0.000471937,0;VDB=0;SGB=-10413;RPB=0.999984;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | G | 62 | SNP | 244093 | T_to_G | WT | A | 2 | 285.05 | 94.97 |
Trim edges of each region
df_seq =
df_seq %>%
filter((POS > 52 & POS < 155) | (POS > 730 & POS < 840) | (POS > 1580 & POS < 1681))
Normalize by sequencing depth
df_seq = df_seq %>%
group_by(SAMPLE,POS) %>%
mutate(NORM_FREQ = (FREQ+1)/(READ_DEPTH),
LOG_NORM_FREQ = log2((FREQ+1)/READ_DEPTH)) %>%
ungroup()
df_seq %>% head()
| SAMPLE | POS | ID | REF | ALT | QUAL | FILTER | VCF_INFO | FORMAT | ALLELE | FREQ | MUT_TYPE | READ_DEPTH | EDIT_TYPE | EDITOR | SITE | REPLICATE | Kreads | >=Q30 | NORM_FREQ | LOG_NORM_FREQ |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| P20657_1010 | 53 | . | A | G,T,C | 0 | . | DP=2962705;I16=2.96189e+06,1,450,0,1.17538e+08,4.66631e+09,16848,637412,1.77713e+08,1.06628e+10,27000,1.62e+06,7.40472e+07,1.85118e+09,11250,281250;QS=59.997,0.00246233,0.000407491,0.000115567;VDB=0;SGB=-6749.3;RPB=0.999986;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | A | 110641 | REF | 110670 | reference | AID | A | 1 | 183.45 | 85.82 | 0.9997470 | -0.0003651 |
| P20657_1010 | 53 | . | A | G,T,C | 0 | . | DP=2962705;I16=2.96189e+06,1,450,0,1.17538e+08,4.66631e+09,16848,637412,1.77713e+08,1.06628e+10,27000,1.62e+06,7.40472e+07,1.85118e+09,11250,281250;QS=59.997,0.00246233,0.000407491,0.000115567;VDB=0;SGB=-6749.3;RPB=0.999986;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | G | 21 | SNP | 110670 | A_to_G | AID | A | 1 | 183.45 | 85.82 | 0.0001988 | -12.2964731 |
| P20657_1010 | 53 | . | A | G,T,C | 0 | . | DP=2962705;I16=2.96189e+06,1,450,0,1.17538e+08,4.66631e+09,16848,637412,1.77713e+08,1.06628e+10,27000,1.62e+06,7.40472e+07,1.85118e+09,11250,281250;QS=59.997,0.00246233,0.000407491,0.000115567;VDB=0;SGB=-6749.3;RPB=0.999986;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | T | 6 | SNP | 110670 | A_to_T | AID | A | 1 | 183.45 | 85.82 | 0.0000633 | -13.9485497 |
| P20657_1010 | 53 | . | A | G,T,C | 0 | . | DP=2962705;I16=2.96189e+06,1,450,0,1.17538e+08,4.66631e+09,16848,637412,1.77713e+08,1.06628e+10,27000,1.62e+06,7.40472e+07,1.85118e+09,11250,281250;QS=59.997,0.00246233,0.000407491,0.000115567;VDB=0;SGB=-6749.3;RPB=0.999986;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | C | 2 | SNP | 110670 | A_to_C | AID | A | 1 | 183.45 | 85.82 | 0.0000271 | -15.1709422 |
| P20657_1002 | 53 | . | A | G,T,C | 0 | . | DP=2962705;I16=2.96189e+06,1,450,0,1.17538e+08,4.66631e+09,16848,637412,1.77713e+08,1.06628e+10,27000,1.62e+06,7.40472e+07,1.85118e+09,11250,281250;QS=59.997,0.00246233,0.000407491,0.000115567;VDB=0;SGB=-6749.3;RPB=0.999986;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | A | 244378 | REF | 244419 | reference | WT | A | 2 | 285.05 | 94.97 | 0.9998363 | -0.0002361 |
| P20657_1002 | 53 | . | A | G,T,C | 0 | . | DP=2962705;I16=2.96189e+06,1,450,0,1.17538e+08,4.66631e+09,16848,637412,1.77713e+08,1.06628e+10,27000,1.62e+06,7.40472e+07,1.85118e+09,11250,281250;QS=59.997,0.00246233,0.000407491,0.000115567;VDB=0;SGB=-6749.3;RPB=0.999986;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | G | 35 | SNP | 244419 | A_to_G | WT | A | 2 | 285.05 | 94.97 | 0.0001473 | -12.7290719 |
Let’s calculate the frequency of all alternative alleles at each position
df_seq = df_seq %>%
group_by(SAMPLE, POS) %>%
mutate(TOTAL_MUT_FREQ = ifelse(MUT_TYPE == "SNP",
sum(NORM_FREQ[MUT_TYPE == "SNP"]),
NA)) %>%
ungroup() %>%
mutate(LOG_TOTAL_MUT_FREQ = log(TOTAL_MUT_FREQ))
First, we calculate the average mutation frequency at each base for the WT samples
df_mut_freq_wt =
df_seq %>%
filter(MUT_TYPE == "SNP",
EDITOR == "WT") %>%
group_by(POS) %>%
summarise(MEAN_TOTAL_MUT_FREQ_WT = mean(TOTAL_MUT_FREQ)) %>%
ungroup()
# Join with original data frame
df_seq =
df_seq %>%
full_join(df_mut_freq_wt,by = "POS")
let’s repeat but for each individual transition
df_mut_freq_wt_trans =
df_seq %>%
filter(MUT_TYPE == "SNP",
EDITOR == "WT") %>%
group_by(POS,EDIT_TYPE) %>%
summarise(MEAN_NORM_FREQ_WT = mean(NORM_FREQ)) %>%
ungroup()
## `summarise()` has grouped output by 'POS'. You can override using the `.groups` argument.
df_seq =
df_seq %>%
full_join(df_mut_freq_wt_trans,by = c("POS","EDIT_TYPE"))
Let’s now subtract the T0 alternative alleles frequency. Let’s define this as the mutation enrichment of a given base.
df_seq =
df_seq %>%
filter(MUT_TYPE == "SNP") %>%
mutate(TOTAL_MUT_ENRICH = TOTAL_MUT_FREQ-MEAN_TOTAL_MUT_FREQ_WT) %>%
mutate(TOTAL_MUT_ENRICH = ifelse(TOTAL_MUT_ENRICH < 0,
0,
TOTAL_MUT_ENRICH))
Also subtract T0 from the individual transistions
df_seq =
df_seq %>%
filter(MUT_TYPE == "SNP") %>%
mutate(MUT_ENRICH = NORM_FREQ-MEAN_NORM_FREQ_WT) %>%
mutate(MUT_ENRICH = ifelse(MUT_ENRICH < 0,
0,
MUT_ENRICH))
WT
df_seq %>%
filter(EDITOR == "WT") %>%
filter(MUT_TYPE == "SNP") %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REPLICATE)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
theme_bw() +
scale_color_viridis_d(option = "D") +
theme(legend.position = "bottom") +
ylab("Proportion of mutated bases (background normalized)") +
ylim(c(0,0.015)) +
ggtitle("Wild-type",subtitle = "All mutations") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
df_seq %>%
filter(EDITOR == "AID") %>%
filter(MUT_TYPE == "SNP") %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REPLICATE)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
theme_bw() +
scale_color_viridis_d(option = "D") +
theme(legend.position = "bottom") +
ylab("Proportion of mutated bases (background normalized)") +
ylim(c(0,0.015)) +
ggtitle("AID only",subtitle = "All mutations") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
Nishida site
df_seq %>%
filter(EDITOR == "Nish") %>%
filter(MUT_TYPE == "SNP") %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REPLICATE)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
theme_bw() +
scale_color_viridis_d(option = "D") +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 767, xmax = 769, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
theme(legend.position = "bottom") +
ylab("Proportion of mutated bases (background normalized)") +
ylim(c(0,0.015)) +
ggtitle("Nishida gRNA",subtitle = "All mutations") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
df_seq %>%
filter(EDITOR == "Nish") %>%
filter(MUT_TYPE == "SNP") %>%
filter(EDIT_TYPE %in% c("C_to_T","A_to_G")) %>%
ggplot(aes(x=POS,y=LOG_NORM_FREQ, color = REPLICATE)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
theme_bw() +
scale_color_viridis_d(option = "D") +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 767, xmax = 769, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 748, xmax = 788, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
theme(legend.position = "bottom") +
ylab("Log normalized muation frequency") +
ggtitle("Nishida gRNA",subtitle = "C->T & A->G only") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
df_seq %>%
filter(EDITOR == "Nish") %>%
filter(MUT_TYPE == "SNP") %>%
filter(EDIT_TYPE %in% c("C_to_T","C_to_A","G_to_T","G_to_A")) %>%
ggplot(aes(x=POS,y=LOG_NORM_FREQ, color = REPLICATE)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
theme_bw() +
scale_color_viridis_d(option = "D") +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 767, xmax = 769, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 748, xmax = 788, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
theme(legend.position = "bottom") +
ylab("Log normalized muation frequency") +
ggtitle("Nishida gRNA",subtitle = "G transitions") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
Compare overall mutation frequency across sites
df_seq %>%
filter(EDITOR == "Nish") %>%
filter(MUT_TYPE == "SNP") %>%
ggplot(aes(x=SITE,y=TOTAL_MUT_ENRICH, fill = SITE)) +
geom_violin(draw_quantiles = 0.5) +
stat_compare_means(comparisons = list(c("A","B"),c("B","C"),c("A","C"))) +
ggtitle("Nishida gRNA",subtitle = "All mutations") +
xlab("CAN1 site") +
ylab("Proportion of mutated bases (background normalized)") +
theme_bw()
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values
df_seq %>%
filter(EDITOR == "Nish") %>%
filter(MUT_TYPE == "SNP") %>%
filter(EDIT_TYPE %in% c("C_to_T","A_to_G")) %>%
ggplot(aes(x=SITE,y=LOG_NORM_FREQ, fill = SITE)) +
geom_violin(draw_quantiles = 0.5) +
stat_compare_means(comparisons = list(c("A","B"),c("B","C"),c("A","C"))) +
ggtitle("Nishida gRNA",subtitle = "C->T & A->G only") +
xlab("CAN1 site") +
ylab("Log normalized mutation frequency") +
theme_bw()
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values
gRNA7 site
df_seq %>%
filter(EDITOR == "gRNA7") %>%
filter(MUT_TYPE == "SNP") %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REPLICATE)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
theme_bw() +
scale_color_viridis_d(option = "D") +
geom_rect(data = data.frame(SITE = "A"), aes(xmin = 108, xmax = 110, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
geom_rect(data = data.frame(SITE = "A"), aes(xmin = 89, xmax = 129, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
theme(legend.position = "bottom") +
ylab("Proportion of mutated bases (background normalized)") +
ylim(c(0,0.015)) +
ggtitle("gRNA 7",subtitle = "All mutations") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
df_seq %>%
filter(EDITOR == "gRNA7") %>%
filter(MUT_TYPE == "SNP") %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REF)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
theme_bw() +
scale_color_viridis_d(option = "D") +
geom_rect(data = data.frame(SITE = "A"), aes(xmin = 108, xmax = 110, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
geom_rect(data = data.frame(SITE = "A"), aes(xmin = 89, xmax = 129, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
theme(legend.position = "bottom") +
ylab("Proportion of mutated bases (background normalized)") +
ylim(c(0,0.015)) +
ggtitle("gRNA 7",subtitle = "All mutations, colored by edited base") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
df_seq %>%
filter(EDITOR == "gRNA7") %>%
filter(MUT_TYPE == "SNP") %>%
filter(EDIT_TYPE %in% c("C_to_T","A_to_G")) %>%
ggplot(aes(x=POS,y=LOG_NORM_FREQ, color = REPLICATE)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
theme_bw() +
scale_color_viridis_d(option = "D") +
geom_rect(data = data.frame(SITE = "A"), aes(xmin = 108, xmax = 110, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
geom_rect(data = data.frame(SITE = "A"), aes(xmin = 89, xmax = 129, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
theme(legend.position = "bottom") +
ylab("Log normalized mutation frequency") +
ggtitle("gRNA 7",subtitle = "C->T & A->G only") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
Alternative site
df_seq %>%
filter(EDITOR == "Altern") %>%
filter(MUT_TYPE == "SNP") %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REPLICATE)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
theme_bw() +
scale_color_viridis_d(option = "D") +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 806, xmax = 808, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 787, xmax = 827, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
theme(legend.position = "bottom") +
ylab("Proportion of mutated bases (background normalized)") +
ylim(c(0,0.015)) +
ggtitle("Alternative gRNA site",subtitle = "All mutations") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
df_seq %>%
filter(EDITOR == "Altern") %>%
filter(MUT_TYPE == "SNP") %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REF)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
theme_bw() +
scale_color_viridis_d(option = "D") +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 806, xmax = 808, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 787, xmax = 827, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
theme(legend.position = "bottom") +
ylab("Proportion of mutated bases (background normalized)") +
ylim(c(0,0.015)) +
ggtitle("Alternative gRNA site",subtitle = "All mutations, colored by edited base") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
df_seq %>%
filter(EDITOR == "Altern") %>%
filter(MUT_TYPE == "SNP") %>%
filter(EDIT_TYPE %in% c("C_to_T","A_to_G")) %>%
ggplot(aes(x=POS,y=LOG_NORM_FREQ, color = REPLICATE)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
theme_bw() +
scale_color_viridis_d(option = "D") +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 806, xmax = 808, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 787, xmax = 827, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
theme(legend.position = "none") +
ylab("Log normalized mutation frequency") +
ggtitle("Alternative gRNA site",subtitle = "C->T & A->G only") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
PmCDA1
df_seq %>%
filter(EDITOR == "PmCDA1") %>%
filter(MUT_TYPE == "SNP") %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REPLICATE)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
theme_bw() +
scale_color_viridis_d(option = "D") +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 806.8, xmax = 807.2, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 787, xmax = 827, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
theme(legend.position = "bottom") +
ylab("Proportion of mutated bases (background normalized)") +
ylim(c(0,0.015)) +
ggtitle("PmCDA1",subtitle = "All mutations") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
df_seq %>%
filter(EDITOR == "PmCDA1") %>%
filter(MUT_TYPE == "SNP") %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REF)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
theme_bw() +
scale_color_viridis_d(option = "D") +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 806.8, xmax = 807.2, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 787, xmax = 827, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
theme(legend.position = "bottom") +
ylab("Proportion of mutated bases (background normalized)") +
ylim(c(0,0.015)) +
ggtitle("PmCDA1",subtitle = "All mutations, colored by edited base") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
df_seq %>%
filter(EDITOR == "PmCDA1") %>%
filter(MUT_TYPE == "SNP") %>%
filter(EDIT_TYPE %in% c("C_to_T","A_to_G")) %>%
ggplot(aes(x=POS,y=LOG_NORM_FREQ, color = REPLICATE)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
theme_bw() +
scale_color_viridis_d(option = "D") +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 806.8, xmax = 807.2, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 787, xmax = 827, ymin = -Inf, ymax = Inf), alpha=0.1, fill="black", inherit.aes = F) +
theme(legend.position = "bottom") +
ylab("Log normalized mutation frequency") +
ggtitle("PmDCA1",subtitle = "C->T & A->G only") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
df_seq %>%
filter(EDITOR == "Altern") %>%
filter(MUT_TYPE == "SNP") %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REPLICATE)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point(alpha = 0.5) +
facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
theme_bw() +
scale_color_viridis_d(option = "D") +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 806, xmax = 808, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
theme(legend.position = "none") +
ylab("Proportion of mutated bases (background normalized)") +
ylim(c(0,0.015)) +
ggtitle("Alternatiive gRNA site",subtitle = "All mutations") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7,
legend.position = "bottom")
df_seq %>%
filter(EDITOR == "gRNA7") %>%
filter(MUT_TYPE == "SNP") %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point(alpha = 0.5) +
facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
theme_bw() +
scale_color_viridis_d(option = "D") +
geom_rect(data = data.frame(SITE = "A"), aes(xmin = 108, xmax = 110, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
theme(legend.position = "none") +
ylab("Proportion of mutated bases (background normalized)") +
ylim(c(0,0.015)) +
ggtitle("gRNA 7",subtitle = "All mutations") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
Trim edges
xlim_nishida = c(717,816)
df_seq %>%
filter(EDITOR == "WT") %>%
filter(MUT_TYPE == "SNP") %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
theme_bw() +
scale_color_viridis_d(option = "D") +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 767, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
xlim(xlim_nishida[1],xlim_nishida[2]) +
theme(legend.position = "none") +
ylab("Proportion of mutated bases (background normalized)") +
ggtitle("WT",subtitle = "All mutations") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
## Warning: Removed 2025 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR == "AID") %>%
filter(MUT_TYPE == "SNP") %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
theme_bw() +
scale_color_viridis_d(option = "D") +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 767, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
xlim(xlim_nishida[1],xlim_nishida[2]) +
theme(legend.position = "none") +
ylab("Proportion of mutated bases (background normalized)") +
ggtitle("AID only",subtitle = "All mutations") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
## Warning: Removed 1656 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR == "Nish") %>%
filter(MUT_TYPE == "SNP") %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
theme_bw() +
scale_color_viridis_d(option = "D") +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 767, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
xlim(xlim_nishida[1],xlim_nishida[2]) +
theme(legend.position = "none") +
ylab("Proportion of mutated bases (background normalized)") +
ggtitle("Nishida",subtitle = "All mutations") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
## Warning: Removed 2025 rows containing missing values (geom_point).
Check PmCDA1 efficiency
xlim_nishida = c(735,807)
df_seq %>%
filter(EDITOR == "WT") %>%
filter(MUT_TYPE == "SNP") %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
theme_bw() +
scale_color_viridis_d(option = "D") +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 766, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
xlim(xlim_nishida[1],xlim_nishida[2]) +
theme(legend.position = "none") +
ylab("Proportion of mutated bases (background normalized)") +
ggtitle("WT",subtitle = "All mutations") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
## Warning: Removed 2142 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR == "AID") %>%
filter(MUT_TYPE == "SNP") %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
theme_bw() +
scale_color_viridis_d(option = "D") +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 766, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
xlim(xlim_nishida[1],xlim_nishida[2]) +
theme(legend.position = "none") +
ylab("Proportion of mutated bases (background normalized)") +
ggtitle("AID only",subtitle = "All mutations") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
## Warning: Removed 1734 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR == "PmCDA1") %>%
filter(MUT_TYPE == "SNP") %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
theme_bw() +
scale_color_viridis_d(option = "D") +
#geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 766, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
geom_rect(data = data.frame(SITE = "B"), aes(xmin = 781.9, xmax = 782.1, ymin = -Inf, ymax = Inf), alpha=1, fill="grey", inherit.aes = F) +
geom_point() +
xlim(770,790) +
theme(legend.position = "none") +
ylab("Proportion of mutated bases (background normalized)") +
ylim(c(0,0.015)) +
ggtitle("PmCDA1",subtitle = "All mutations") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
## Warning: Removed 1164 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR == "PmCDA1") %>%
filter(MUT_TYPE == "SNP") %>%
filter(POS>780,POS < 785)
| SAMPLE | POS | ID | REF | ALT | QUAL | FILTER | VCF_INFO | FORMAT | ALLELE | FREQ | MUT_TYPE | READ_DEPTH | EDIT_TYPE | EDITOR | SITE | REPLICATE | Kreads | >=Q30 | NORM_FREQ | LOG_NORM_FREQ | TOTAL_MUT_FREQ | LOG_TOTAL_MUT_FREQ | MEAN_TOTAL_MUT_FREQ_WT | MEAN_NORM_FREQ_WT | TOTAL_MUT_ENRICH | MUT_ENRICH |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| P20657_1066 | 781 | . | C | T,G,A | 0 | . | DP=3163412;I16=3.16247e+06,6,923,0,1.25865e+08,5.01079e+09,35334,1.36205e+06,1.89749e+08,1.13849e+10,55380,3.3228e+06,7.90619e+07,1.97655e+09,23075,576875;QS=65.98,0.018119,0.00120617,0.00070575;VDB=0;SGB=-15609;RPB=0.999216;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | T | 15 | SNP | 140560 | C_to_T | PmCDA1 | B | 1 | 155.45 | 96.56 | 0.0001138 | -13.100827 | 0.0001850 | -8.595293 | 0.0001677 | 0.0001214 | 0.0000173 | 0.0000000 |
| P20657_1066 | 781 | . | C | T,G,A | 0 | . | DP=3163412;I16=3.16247e+06,6,923,0,1.25865e+08,5.01079e+09,35334,1.36205e+06,1.89749e+08,1.13849e+10,55380,3.3228e+06,7.90619e+07,1.97655e+09,23075,576875;QS=65.98,0.018119,0.00120617,0.00070575;VDB=0;SGB=-15609;RPB=0.999216;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | G | 4 | SNP | 140560 | C_to_G | PmCDA1 | B | 1 | 155.45 | 96.56 | 0.0000356 | -14.778899 | 0.0001850 | -8.595293 | 0.0001677 | 0.0000284 | 0.0000173 | 0.0000071 |
| P20657_1066 | 781 | . | C | T,G,A | 0 | . | DP=3163412;I16=3.16247e+06,6,923,0,1.25865e+08,5.01079e+09,35334,1.36205e+06,1.89749e+08,1.13849e+10,55380,3.3228e+06,7.90619e+07,1.97655e+09,23075,576875;QS=65.98,0.018119,0.00120617,0.00070575;VDB=0;SGB=-15609;RPB=0.999216;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | A | 4 | SNP | 140560 | C_to_A | PmCDA1 | B | 1 | 155.45 | 96.56 | 0.0000356 | -14.778899 | 0.0001850 | -8.595293 | 0.0001677 | 0.0000178 | 0.0000173 | 0.0000178 |
| P20657_1066 | 782 | . | G | A,C,T | 0 | . | DP=3163409;I16=3.15948e+06,6,3919,0,1.26129e+08,5.03551e+09,154938,6.13251e+06,1.89569e+08,1.13741e+10,235140,1.41084e+07,7.89872e+07,1.97468e+09,97975,2.44938e+06;QS=65.8904,0.0720559,0.0357055,0.00188342;VDB=0;SGB=-77742.3;RPB=0.98563;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | A | 446 | SNP | 140561 | G_to_A | PmCDA1 | B | 1 | 155.45 | 96.56 | 0.0031801 | -8.296706 | 0.0062677 | -5.072339 | 0.0001221 | 0.0000812 | 0.0061456 | 0.0030989 |
| P20657_1066 | 782 | . | G | A,C,T | 0 | . | DP=3163409;I16=3.15948e+06,6,3919,0,1.26129e+08,5.03551e+09,154938,6.13251e+06,1.89569e+08,1.13741e+10,235140,1.41084e+07,7.89872e+07,1.97468e+09,97975,2.44938e+06;QS=65.8904,0.0720559,0.0357055,0.00188342;VDB=0;SGB=-77742.3;RPB=0.98563;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | C | 390 | SNP | 140561 | G_to_C | PmCDA1 | B | 1 | 155.45 | 96.56 | 0.0027817 | -8.489812 | 0.0062677 | -5.072339 | 0.0001221 | 0.0000165 | 0.0061456 | 0.0027652 |
| P20657_1066 | 782 | . | G | A,C,T | 0 | . | DP=3163409;I16=3.15948e+06,6,3919,0,1.26129e+08,5.03551e+09,154938,6.13251e+06,1.89569e+08,1.13741e+10,235140,1.41084e+07,7.89872e+07,1.97468e+09,97975,2.44938e+06;QS=65.8904,0.0720559,0.0357055,0.00188342;VDB=0;SGB=-77742.3;RPB=0.98563;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | T | 42 | SNP | 140561 | G_to_T | PmCDA1 | B | 1 | 155.45 | 96.56 | 0.0003059 | -11.674572 | 0.0062677 | -5.072339 | 0.0001221 | 0.0000244 | 0.0061456 | 0.0002815 |
| P20657_1066 | 783 | . | T | C,A,G | 0 | . | DP=3163362;I16=3.1628e+06,6,551,0,1.25982e+08,5.01913e+09,20617,779297,1.89768e+08,1.13861e+10,33060,1.9836e+06,7.90702e+07,1.97675e+09,13775,344375;QS=65.986,0.0131302,0.0008268,5.91829e-05;VDB=0;SGB=-9355.51;RPB=0.99991;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | C | 21 | SNP | 140559 | T_to_C | PmCDA1 | B | 1 | 155.45 | 96.56 | 0.0001565 | -12.641385 | 0.0001992 | -8.521178 | 0.0002042 | 0.0001514 | 0.0000000 | 0.0000051 |
| P20657_1066 | 783 | . | T | C,A,G | 0 | . | DP=3163362;I16=3.1628e+06,6,551,0,1.25982e+08,5.01913e+09,20617,779297,1.89768e+08,1.13861e+10,33060,1.9836e+06,7.90702e+07,1.97675e+09,13775,344375;QS=65.986,0.0131302,0.0008268,5.91829e-05;VDB=0;SGB=-9355.51;RPB=0.99991;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | A | 3 | SNP | 140559 | T_to_A | PmCDA1 | B | 1 | 155.45 | 96.56 | 0.0000285 | -15.100816 | 0.0001992 | -8.521178 | 0.0002042 | 0.0000475 | 0.0000000 | 0.0000000 |
| P20657_1066 | 783 | . | T | C,A,G | 0 | . | DP=3163362;I16=3.1628e+06,6,551,0,1.25982e+08,5.01913e+09,20617,779297,1.89768e+08,1.13861e+10,33060,1.9836e+06,7.90702e+07,1.97675e+09,13775,344375;QS=65.986,0.0131302,0.0008268,5.91829e-05;VDB=0;SGB=-9355.51;RPB=0.99991;MQB=1;MQSB=1;BQB=0;MQ0F=0 | PL:AD | G | 1 | SNP | 140559 | T_to_G | PmCDA1 | B | 1 | 155.45 | 96.56 | 0.0000142 | -16.100816 | 0.0001992 | -8.521178 | 0.0002042 | 0.0000053 | 0.0000000 | 0.0000089 |
| P20657_1066 | 784 | . | T | A,C,G | 0 | . | DP=3163412;I16=3.16294e+06,6,415,0,1.26185e+08,5.03467e+09,15965,617977,1.89777e+08,1.13866e+10,24900,1.494e+06,7.90736e+07,1.97684e+09,10375,259375;QS=65.9798,0.0176772,0.00244375,3.88912e-05;VDB=0;SGB=-7069.3;RPB=0.958516;MQB=1;MQSB=1;BQB=1.03763e-29;MQ0F=0 | PL:AD | A | 1 | SNP | 140559 | T_to_A | PmCDA1 | B | 1 | 155.45 | 96.56 | 0.0000142 | -16.100816 | 0.0001565 | -8.762340 | 0.0001608 | 0.0000105 | 0.0000000 | 0.0000038 |
| P20657_1066 | 784 | . | T | A,C,G | 0 | . | DP=3163412;I16=3.16294e+06,6,415,0,1.26185e+08,5.03467e+09,15965,617977,1.89777e+08,1.13866e+10,24900,1.494e+06,7.90736e+07,1.97684e+09,10375,259375;QS=65.9798,0.0176772,0.00244375,3.88912e-05;VDB=0;SGB=-7069.3;RPB=0.958516;MQB=1;MQSB=1;BQB=1.03763e-29;MQ0F=0 | PL:AD | C | 18 | SNP | 140559 | T_to_C | PmCDA1 | B | 1 | 155.45 | 96.56 | 0.0001352 | -12.852889 | 0.0001565 | -8.762340 | 0.0001608 | 0.0001450 | 0.0000000 | 0.0000000 |
| P20657_1066 | 784 | . | T | A,C,G | 0 | . | DP=3163412;I16=3.16294e+06,6,415,0,1.26185e+08,5.03467e+09,15965,617977,1.89777e+08,1.13866e+10,24900,1.494e+06,7.90736e+07,1.97684e+09,10375,259375;QS=65.9798,0.0176772,0.00244375,3.88912e-05;VDB=0;SGB=-7069.3;RPB=0.958516;MQB=1;MQSB=1;BQB=1.03763e-29;MQ0F=0 | PL:AD | G | 0 | SNP | 140559 | T_to_G | PmCDA1 | B | 1 | 155.45 | 96.56 | 0.0000071 | -17.100816 | 0.0001565 | -8.762340 | 0.0001608 | 0.0000053 | 0.0000000 | 0.0000018 |
2x
df_seq %>%
filter(EDITOR == "2x") %>%
filter(MUT_TYPE == "SNP") %>%
filter(!is.na(TOTAL_MUT_ENRICH)) %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
theme_bw() +
facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
scale_color_viridis_d(option = "D") +
#geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 766, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
ylab("Proportion of mutated bases (background normalized)") +
ggtitle("2x",subtitle = "All mutations") +
xlab("Position on gene") +
ylim(c(0,0.015)) +
theme(aspect.ratio = 0.7,
legend.position = "none")
df_seq %>%
filter(EDITOR == "2x") %>%
filter(MUT_TYPE == "SNP") %>%
filter(!is.na(TOTAL_MUT_ENRICH)) %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REF)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
theme_bw() +
facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
scale_color_viridis_d(option = "D") +
#geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 766, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
theme(legend.position = "bottom") +
ylab("Proportion of mutated bases (background normalized)") +
ggtitle("2x",subtitle = "All mutations, colored by edited base") +
xlab("Position on gene") +
ylim(c(0,0.015)) +
theme(aspect.ratio = 0.7)
3x
df_seq %>%
filter(EDITOR == "3x") %>%
filter(MUT_TYPE == "SNP") %>%
filter(!is.na(TOTAL_MUT_ENRICH)) %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
theme_bw() +
facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
scale_color_viridis_d(option = "D") +
#geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 766, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
theme(legend.position = "none") +
ylab("Proportion of mutated bases (background normalized)") +
ggtitle("3x",subtitle = "All mutations") +
xlab("Position on gene") +
ylim(c(0,0.015)) +
theme(aspect.ratio = 0.7)
# Colored by edited base
df_seq %>%
filter(EDITOR == "3x") %>%
filter(MUT_TYPE == "SNP") %>%
filter(!is.na(TOTAL_MUT_ENRICH)) %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = REF)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
theme_bw() +
facet_wrap(vars(SITE),ncol = 3,scales = "free_x") +
scale_color_viridis_d(option = "D") +
#geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 766, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
theme(legend.position = "bottom") +
ylab("Proportion of mutated bases (background normalized)") +
ggtitle("3x",subtitle = "All mutations, colored by edited base") +
xlab("Position on gene") +
ylim(c(0,0.015)) +
theme(aspect.ratio = 0.7)
3x & WT together
df_seq %>%
filter(EDITOR %in% c("WT","3x")) %>%
filter(MUT_TYPE == "SNP") %>%
filter(!is.na(TOTAL_MUT_ENRICH)) %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = EDITOR)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
theme_bw() +
facet_wrap(vars(SITE,EDITOR),ncol = 3,scales = "free_x") +
scale_color_viridis_d(option = "D") +
#geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 766, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
theme(legend.position = "none") +
ylab("Proportion of mutated bases (background normalized)") +
ggtitle("3x",subtitle = "All mutations") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
3x, faceted by replicate
df_seq %>%
filter(EDITOR %in% c("3x")) %>%
filter(MUT_TYPE == "SNP") %>%
filter(!is.na(TOTAL_MUT_ENRICH)) %>%
ggplot(aes(x=POS,y=TOTAL_MUT_ENRICH, color = SITE)) +
#anno_rect_baseEditWindow +
#anno_rect_gRNA +
#anno_rect_PAM +
geom_point() +
theme_bw() +
facet_wrap(vars(SITE,REPLICATE),ncol = 3,scales = "free_x") +
scale_color_viridis_d(option = "D") +
#geom_rect(data = data.frame(SITE = "B"), aes(xmin = 764, xmax = 766, ymin = -Inf, ymax = Inf), alpha=1, fill="black", inherit.aes = F) +
theme(legend.position = "none") +
ylab("Proportion of mutated bases (background normalized)") +
ggtitle("3x",subtitle = "All mutations") +
xlab("Position on gene") +
theme(aspect.ratio = 0.7)
WT
df_seq %>%
filter(EDITOR %in% c("WT")) %>%
filter(MUT_TYPE == "SNP") %>%
#filter(SITE == "A") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
geom_jitter() +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 740 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR %in% c("WT")) %>%
filter(MUT_TYPE == "SNP") %>%
#filter(SITE == "A") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
geom_jitter() +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 719 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR %in% c("WT")) %>%
filter(MUT_TYPE == "SNP") %>%
#filter(SITE == "A") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
geom_jitter() +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.0025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 734 rows containing missing values (geom_point).
AID
df_seq %>%
filter(EDITOR %in% c("AID")) %>%
filter(MUT_TYPE == "SNP") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
geom_jitter() +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 549 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR %in% c("AID")) %>%
filter(MUT_TYPE == "SNP") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
geom_jitter() +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 586 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR %in% c("AID")) %>%
filter(MUT_TYPE == "SNP") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
geom_jitter() +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.0025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 552 rows containing missing values (geom_point).
2x
df_seq %>%
filter(EDITOR %in% c("2x")) %>%
filter(MUT_TYPE == "SNP") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
geom_jitter() +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.25) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 622 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR %in% c("2x")) %>%
filter(MUT_TYPE == "SNP") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
geom_jitter() +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 630 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR %in% c("2x")) %>%
filter(MUT_TYPE == "SNP") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
geom_jitter() +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.0025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 629 rows containing missing values (geom_point).
3x
df_seq %>%
filter(EDITOR %in% c("3x")) %>%
filter(MUT_TYPE == "SNP") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
geom_jitter() +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.25) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 569 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR %in% c("3x")) %>%
filter(MUT_TYPE == "SNP") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
geom_jitter() +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 548 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR %in% c("3x")) %>%
filter(MUT_TYPE == "SNP") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
geom_jitter() +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.0025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 565 rows containing missing values (geom_point).
p_trans_wt =
df_seq %>%
filter(EDITOR %in% c("WT")) %>%
filter(MUT_TYPE == "SNP") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
geom_jitter() +
theme_bw() +
ggtitle("WT") +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.0025) +
ylab("Proportion of mutated bases (background normalized)") +
theme(legend.position = "none")
p_trans_gRNA =
df_seq %>%
filter(EDITOR %in% c("Nish","gRNA7","Altern","2x","3x")) %>%
filter(MUT_TYPE == "SNP") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
geom_jitter() +
ggtitle("All gRNAs tabulated") +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.0025) +
ylab("Proportion of mutated bases (background normalized)") +
theme(legend.position = "none")
p_trans_wt / p_trans_gRNA
## Warning: Removed 726 rows containing missing values (geom_point).
## Warning: Removed 3436 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR %in% c("WT")) %>%
filter(MUT_TYPE == "SNP") %>%
filter(SITE == "A") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
geom_jitter() +
theme_bw() +
ggtitle("WT") +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.0025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 247 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR %in% c("Nish","gRNA7","Altern","2x","3x")) %>%
filter(MUT_TYPE == "SNP") %>%
filter(SITE == "A") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=MUT_ENRICH,group = ALLELE,color = ALLELE)) +
geom_jitter() +
ggtitle("All gRNAs tabulated") +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.0025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 1075 rows containing missing values (geom_point).
Calculate noise threshold
df_seq %>%
filter(EDITOR %in% c("WT")) %>%
filter(MUT_TYPE == "SNP") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
mutate(ID = "ID") %>%
ggplot(aes(y=MUT_ENRICH, x=ID)) +
geom_boxplot(draw_quantiles = T) +
geom_jitter(alpha = 0.1) +
theme_bw() +
ggtitle("WT") +
ylim(0,0.00025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Ignoring unknown parameters: draw_quantiles
## Warning: Removed 729 rows containing missing values (geom_point).
Define 99% quantile for the WT data
df_seq %>%
filter(EDITOR %in% c("WT")) %>%
filter(MUT_TYPE == "SNP") %>%
filter(REPLICATE != "3") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
mutate(ID = "ID") %>%
summarise(quantile = scales::percent(c(0.25, 0.99)),
MUT_ENRICH_quant = quantile(MUT_ENRICH, c(0.25, 0.99)))
| quantile | MUT_ENRICH_quant |
|---|---|
| 25% | 0.00e+00 |
| 99% | 4.63e-05 |
Compare editing profiles of WT, AID only, single gRNAs only, multiplexed gRNAs only and PmCDA1 only. Note that single gRNA profiles include off target activity as well!
df_seq %>%
filter(EDITOR %in% c("WT")) %>%
filter(MUT_TYPE == "SNP") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
geom_jitter() +
geom_hline(yintercept = 5.5723e-05) +
theme_bw() +
ggtitle("WT") +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.0025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 6 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR %in% c("AID")) %>%
filter(MUT_TYPE == "SNP") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
geom_jitter() +
geom_hline(yintercept = 5.5723e-05) +
theme_bw() +
ggtitle("AID") +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.0025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 5 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR %in% c("Nish","gRNA7","Altern","2x","3x")) %>%
filter(MUT_TYPE == "SNP") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
geom_jitter() +
geom_hline(yintercept = 5.5723e-05) +
ggtitle("All gRNAs tabulated") +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.0025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 38 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR %in% c("Nish","gRNA7","Altern")) %>%
filter(MUT_TYPE == "SNP") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
geom_jitter() +
geom_hline(yintercept = 5.5723e-05) +
ggtitle("single gRNAs only") +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.0025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 19 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR %in% c("2x","3x")) %>%
filter(MUT_TYPE == "SNP") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
geom_jitter() +
geom_hline(yintercept = 5.5723e-05) +
ggtitle("multiplexed gRNAs only") +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.0025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 19 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR %in% c("PmCDA1")) %>%
filter(MUT_TYPE == "SNP") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
geom_jitter() +
geom_hline(yintercept = 5.5723e-05) +
ggtitle("PmCDA1 only") +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.0025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 5 rows containing missing values (geom_point).
Exclude fragment C from plots
df_seq %>%
filter(EDITOR %in% c("WT")) %>%
filter(MUT_TYPE == "SNP") %>%
filter(SITE != "C") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
geom_jitter() +
geom_hline(yintercept = 5.5723e-05) +
theme_bw() +
ggtitle("WT") +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.0025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 3 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR %in% c("AID")) %>%
filter(MUT_TYPE == "SNP") %>%
filter(SITE != "C") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
geom_jitter() +
geom_hline(yintercept = 5.5723e-05) +
theme_bw() +
ggtitle("AID") +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.0025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 3 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR %in% c("Nish","gRNA7","Altern","2x","3x")) %>%
filter(MUT_TYPE == "SNP") %>%
filter(SITE != "C") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
geom_jitter() +
geom_hline(yintercept = 5.5723e-05) +
ggtitle("All gRNAs tabulated") +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.0025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 24 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR %in% c("Nish","gRNA7","Altern")) %>%
filter(MUT_TYPE == "SNP") %>%
filter(SITE != "C") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
geom_jitter() +
geom_hline(yintercept = 5.5723e-05) +
ggtitle("single gRNAs only") +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.0025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 10 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR %in% c("2x","3x")) %>%
filter(MUT_TYPE == "SNP") %>%
filter(SITE != "C") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
geom_jitter() +
geom_hline(yintercept = 5.5723e-05) +
ggtitle("multiplexed gRNAs only") +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.0025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 14 rows containing missing values (geom_point).
df_seq %>%
filter(EDITOR %in% c("PmCDA1")) %>%
filter(MUT_TYPE == "SNP") %>%
filter(SITE != "C") %>%
mutate(REF = factor(REF, levels = c("A","C","G","T")),
ALLELE = factor(ALLELE, levels = c("A","C","G","T","D"))) %>%
filter(!is.na(MUT_ENRICH)) %>%
ggplot(aes(x=ALLELE,y=NORM_FREQ,group = ALLELE,color = ALLELE)) +
geom_jitter() +
geom_hline(yintercept = 5.5723e-05) +
ggtitle("PmCDA1 only") +
theme_bw() +
facet_wrap(vars(REF),ncol = 4) +
ylim(0,0.0025) +
ylab("Proportion of mutated bases (background normalized)")
## Warning: Removed 2 rows containing missing values (geom_point).